/*****************************************************************************************************************************************
******Code for Currie, Mueller-Smith, Rossin-Slater "Violence while in Utero: The Impacts of Assaults During Pregnancy on Birth Outcomes**
******July 2020***********************************************************************************************************************/

*****This code cleans the merged NYC crimes/births data and conducts analyses to produce the following output in the manuscript:

*** APPENDIX TABLE A6 (subsequent fertility)

set more off
set matsize 10000

clear all

cap log close

***** Paths to data set locations, results graphs *****
global bulk 
global home 
global results 
global graphs

**** first, run clean_birthoutcomes.do file to clean the births data, then the mergebirths_tocrime.do file to merge births data to crime data

set scheme s1color

use ${bulk}births_crimes.dta, clear

keep if concep_year>=2004 & concep_year<=2012

***singletons only
keep if singleton==1

********************************************************
***** LINKING SIBLINGS ****
********************************************************

*Use mom's (anonymized) SSN

gen mom_ssn_id = M_SSN_ID

bys mom_ssn_id: gen numsibs =_N

tab numsibs

*A few errors, replace as missing:
codebook mom_ssn_id if numsibs==205644

codebook mom_ssn_id if numsibs==430

replace mom_ssn_id = . if numsibs==205644 | numsibs==430

*for those who are missing SSN, use mom's full maiden name + DOB (anonymized)

egen mom_nameDOB_id = group(M_FNAME_ANON M_MNAME_ANON M_LNAME_ANON M_DOBM_ANON M_DOBDD_ANON M_DOBYY_ANON), missing

sum mom_nameDOB_id
*max is 1,683,249 -- so will add 2,000,000 to the SSN to make sure we have unique IDs

gen mom_id = .
gen str mom_id_source = ""
replace mom_id = mom_nameDOB_id if mom_ssn_id==.
replace mom_id_source = "name/DOB" if mom_ssn_id==.
replace mom_id = mom_ssn_id + 2000000 if mom_ssn_id!=.
replace mom_id_source = "SSN" if mom_ssn_id!=.


drop numsibs

bys mom_id: gen numsibs = _N
tab numsibs

label var numsibs "Number Siblings"

*** keep one obs per mom, using her first observed pregnancy


*child's year/month of birth

gen child_ym = ym(birth_year, birth_month)
format child_ym %tm


sort mom_id child_ym
by mom_id: keep if _n==1


**** only keep moms in single family home 
keep if house==1

count
codebook mom_id


/***** BBQ SAMPLE: Bronx, Brooklyn, and Queens only ****/
destring mom_borough, replace
gen BBQ = (mom_borough==2|mom_borough==3|mom_borough==4)

/********************BBQ Sample Only **************/
keep if BBQ==1


/******** Additional Variables ************/

**** parity
gen parity1 = prevlivebirths==0
gen parity2 = prevlivebirths==1
gen parity3plus = prevlivebirths>=2 & prevlivebirths<.
gen paritymiss = prevlivebirths==.


***mom foreign: recode missing as 0
gen mom_foreignmiss = (mom_foreign==.)
replace mom_foreign = 0 if mom_foreign==.

***dad foreign: recode missing as 0
gen dad_foreignmiss = (dad_foreign==.)
replace dad_foreign = 0 if dad_foreign==.

**** father info missing
gen dadmiss = (dadagemiss==1 & dadeducmiss==1 & dadmissingrace==1 & dad_foreignmiss==1)
tab dadmiss

label var dadmiss "Father Info Missing"


/*************** CRIME VARIABLES *************/

global crimeclass felony misdemeanor violation

global crimetypes assault  


**** NOTE: we calculate the total number of crimes during pregnancy in the mergebirths_tocrime.do file


*here, let's calculate the total number of crimes in 10 months after pregnancy: post10-post19


foreach type in $crimetypes {
egen tot`type'_post = rowtotal(tot`type'_post10 tot`type'_post11 tot`type'_post12 tot`type'_post13 tot`type'_post14 tot`type'_post15 tot`type'_post16 tot`type'_post17 tot`type'_post18 tot`type'_post19)
}

foreach type in $crimetypes {
gen any`type'_preg = tot`type'_preg>0 if tot`type'_preg<.
gen any`type'_post = tot`type'_post>0 if tot`type'_post<.
}

sum anyassault_preg
sum anyassault_post


label var anyassault_preg "Assault During First Pregnancy"




#delimit ;
global controls momage1 momage2 momage3 momage4 momagemiss dadage1 dadage2 dadage3 dadage4 dadagemiss 
mom_married mom_foreign dad_foreign momnonhispanicwhite momhispanic momnonhispanicblack momotherrace mommissingrace dadnonhispanicwhite dadhispanic dadnonhispanicblack dadotherrace dadmissingrace 
momeduc1 momeduc2 momeduc3 momeduc4 momeducmiss dadeduc1 dadeduc2 dadeduc3 dadeduc4 dadeducmiss singleton parity1 parity2 parity3plus paritymiss ;
#delimit cr


**** Does assault during first pregnancy predit subsequent fertility?

gen anymorekids = numsibs>=2
gen numfuturekids = numsibs-1

eststo clear

eststo: areg anymorekids anyassault_preg $controls i.concep_year i.concep_month, abs(mom_borough) r
qui sum `e(depvar)' if e(sample)==1
estadd scalar dv=`r(mean)'

eststo: areg numfuturekids anyassault_preg $controls i.concep_year i.concep_month, abs(mom_borough) r
qui sum `e(depvar)' if e(sample)==1
estadd scalar dv=`r(mean)'

#delimit ;
esttab using "${results}subsequentfertility.tex", replace fragment booktabs keep(anyassault_preg) 
se star(* .1 ** .05 *** .01) stats(dv N,  label("Dept. var mean" "Indiv. obs."))  
mtitles("Any More Kids" "Number Future Kids") 
wrap label varwidth(20) brackets   ;
#delimit cr


clear
